@@ -19,7 +19,7 @@ module Agents |
||
19 | 19 |
|
20 | 20 |
Options: |
21 | 21 |
|
22 |
- * `url` - The URL of the RSS feed. |
|
22 |
+ * `url` - The URL of the RSS feed (an array of URLs can also be used; items with identical guids across feeds will be considered duplicates). |
|
23 | 23 |
* `clean` - Attempt to use [feed-normalizer](https://github.com/aasmith/feed-normalizer)'s' `clean!` method to cleanup HTML in the feed. Set to `true` to use. |
24 | 24 |
* `expected_update_period_in_days` - How often you expect this RSS feed to change. If more than this amount of time passes without an update, the Agent will mark itself as not working. |
25 | 25 |
* `headers` - When present, it should be a hash of headers to send with the request. |
@@ -70,32 +70,34 @@ module Agents |
||
70 | 70 |
end |
71 | 71 |
|
72 | 72 |
def check |
73 |
- response = faraday.get(interpolated['url']) |
|
74 |
- if response.success? |
|
75 |
- feed = FeedNormalizer::FeedNormalizer.parse(response.body) |
|
76 |
- feed.clean! if interpolated['clean'] == 'true' |
|
77 |
- created_event_count = 0 |
|
78 |
- feed.entries.each do |entry| |
|
79 |
- entry_id = get_entry_id(entry) |
|
80 |
- if check_and_track(entry_id) |
|
81 |
- created_event_count += 1 |
|
82 |
- create_event(payload: { |
|
83 |
- id: entry_id, |
|
84 |
- date_published: entry.date_published, |
|
85 |
- last_updated: entry.last_updated, |
|
86 |
- url: entry.url, |
|
87 |
- urls: entry.urls, |
|
88 |
- description: entry.description, |
|
89 |
- content: entry.content, |
|
90 |
- title: entry.title, |
|
91 |
- authors: entry.authors, |
|
92 |
- categories: entry.categories |
|
93 |
- }) |
|
73 |
+ Array(interpolated['url']).each do |url| |
|
74 |
+ response = faraday.get(url) |
|
75 |
+ if response.success? |
|
76 |
+ feed = FeedNormalizer::FeedNormalizer.parse(response.body) |
|
77 |
+ feed.clean! if interpolated['clean'] == 'true' |
|
78 |
+ created_event_count = 0 |
|
79 |
+ feed.entries.each do |entry| |
|
80 |
+ entry_id = get_entry_id(entry) |
|
81 |
+ if check_and_track(entry_id) |
|
82 |
+ created_event_count += 1 |
|
83 |
+ create_event(payload: { |
|
84 |
+ id: entry_id, |
|
85 |
+ date_published: entry.date_published, |
|
86 |
+ last_updated: entry.last_updated, |
|
87 |
+ url: entry.url, |
|
88 |
+ urls: entry.urls, |
|
89 |
+ description: entry.description, |
|
90 |
+ content: entry.content, |
|
91 |
+ title: entry.title, |
|
92 |
+ authors: entry.authors, |
|
93 |
+ categories: entry.categories |
|
94 |
+ }) |
|
95 |
+ end |
|
94 | 96 |
end |
97 |
+ log "Fetched #{interpolated['url']} and created #{created_event_count} event(s)." |
|
98 |
+ else |
|
99 |
+ error "Failed to fetch #{interpolated['url']}: #{response.inspect}" |
|
95 | 100 |
end |
96 |
- log "Fetched #{interpolated['url']} and created #{created_event_count} event(s)." |
|
97 |
- else |
|
98 |
- error "Failed to fetch #{interpolated['url']}: #{response.inspect}" |
|
99 | 101 |
end |
100 | 102 |
end |
101 | 103 |
|
@@ -25,6 +25,9 @@ describe Agents::RssAgent do |
||
25 | 25 |
agent.options['url'] = "http://google.com" |
26 | 26 |
expect(agent).to be_valid |
27 | 27 |
|
28 |
+ agent.options['url'] = ["http://google.com", "http://yahoo.com"] |
|
29 |
+ expect(agent).to be_valid |
|
30 |
+ |
|
28 | 31 |
agent.options['url'] = "" |
29 | 32 |
expect(agent).not_to be_valid |
30 | 33 |
|
@@ -82,6 +85,15 @@ describe Agents::RssAgent do |
||
82 | 85 |
agent.check |
83 | 86 |
expect(agent.memory['seen_ids'].length).to eq(500) |
84 | 87 |
end |
88 |
+ |
|
89 |
+ it "should support an array of URLs" do |
|
90 |
+ agent.options['url'] = ["https://github.com/cantino/huginn/commits/master.atom", "http://feeds.feedburner.com/SlickdealsnetFP?format=atom"] |
|
91 |
+ agent.save! |
|
92 |
+ |
|
93 |
+ expect { |
|
94 |
+ agent.check |
|
95 |
+ }.to change { agent.events.count }.by(20 + 79) |
|
96 |
+ end |
|
85 | 97 |
end |
86 | 98 |
|
87 | 99 |
context "when no ids are available" do |